Final Report¶
Instructions¶
- Read the instructions for the final assignment from the course page.
- To get help with Markdown formatting check this page.
- If you don't know how to add a Markdown cell check this page
Download the Notebook template¶
You can download this tutorial Notebook to your own computer by clicking the Download button from the Menu on the top-right section of the website.
- Right-click the option that says
.ipynband choose "Save link as .."

Add your work below¶
Mix markdown cells with code cells regularly to explain and document your work / codes (in a similar manner as how the Exercises of this course were written).Below we provide you a suggestion for the structure of the report by listing the main components that would be good to include in your work. However, you do not necessarily need to follow this structure and you can adjust the headings as you see best.
1. Introduction¶
Add introduction.
2. Data and methods¶
Describe your data (list also the used data sources) and give an overview of the methods that you use in your work.
3. Data analysis + Results¶
Add and explain your analysis workflow below here.
# Add your data analysis here by adding code cells as you see bestPrepare Population data¶
population_density.ipynb
import rioxarrayfile_path = "data/aut_ppp_2020_constrained.tif" data_array_au = rioxarray.open_rasterio(file_path, chunks=True, lock=False)data_array_auimport datashader as dsimport xarray as xrfrom colorcet import palettefrom datashader import transfer_functions as tfimport numpy as np# crop the data arraydata_array_c = data_array_audata_array_c = xr.DataArray(data_array_c)# prepare to plotdata_array_c = xr.DataArray(data_array_c)[0]data_array_c = data_array_c.where(data_array_c > 0)data_array_c = data_array_c.compute()data_array_c = np.flip(data_array_c, 0)# get the image sizesize = 1200asp = data_array_c.shape[0] / data_array_c.shape[1]# create the data shader canvascvs = ds.Canvas(plot_width=size, plot_height=int(asp*size))raster = cvs.raster(data_array_c)# draw the imagecmap = palette["fire"]img = tf.shade(raster, how="eq_hist", cmap=cmap)img = tf.set_background(img, "black")imgfile_path = "data/deu_ppp_2020_constrained.tif" data_array_deu = rioxarray.open_rasterio(file_path, chunks=True, lock=False)data_array_deu# crop the data arraydata_array_c = data_array_deudata_array_c = xr.DataArray(data_array_c)# prepare to plotdata_array_c = xr.DataArray(data_array_c)[0]data_array_c = data_array_c.where(data_array_c > 0)data_array_c = data_array_c.compute()data_array_c = np.flip(data_array_c, 0)# get the image sizesize = 1000asp = data_array_c.shape[0] / data_array_c.shape[1]# create the data shader canvascvs = ds.Canvas(plot_width=size, plot_height=int(asp*size))raster = cvs.raster(data_array_c)# draw the imagecmap = palette["fire"]img = tf.shade(raster, how="eq_hist", cmap=cmap)img = tf.set_background(img, "black")imgimport osmnx as oximport geopandas as gpdfrom shapely.geometry import Polygonfrom shapely.ops import unary_union# Specify the name of the city and the countryplace_name = "Vienna, Austria"# Use OSMnx to download the city boundarycity = ox.geocode_to_gdf(place_name)# The geometry in the resulting GeoDataFrame can be a Polygon (if the city has a single, continuous boundary) # or a MultiPolygon (if the city boundary is not continuous). To handle both cases:if isinstance(city.geometry.iloc[0], Polygon): city_boundary = city.geometry.iloc[0]else: # It's a MultiPolygon city_boundary = unary_union(city.geometry)# Create a GeoDataFramevienna_boundary = gpd.GeoDataFrame(index=[0], crs=city.crs, geometry=[city_boundary])# Plot the boundaryvienna_boundary.plot()place_name = "Berlin, Germany"# Use OSMnx to download the city boundarycity = ox.geocode_to_gdf(place_name)# The geometry in the resulting GeoDataFrame can be a Polygon (if the city has a single, continuous boundary) # or a MultiPolygon (if the city boundary is not continuous). To handle both cases:if isinstance(city.geometry.iloc[0], Polygon): city_boundary = city.geometry.iloc[0]else: # It's a MultiPolygon city_boundary = unary_union(city.geometry)# Create a GeoDataFrameberlin_boundary = gpd.GeoDataFrame(index=[0], crs=city.crs, geometry=[city_boundary])# Plot the boundaryberlin_boundary.plot()import pandas as pdimport geopandas as gpdfrom shapely.geometry import Pointfrom tqdm import tqdmadmin = vienna_boundarydf = pd.DataFrame(data_array.to_series(), columns=['population']).dropna()# Extract the city boundarycity = ox.geocode_to_gdf("Vienna, Austria")if isinstance(city.geometry.iloc[0], Polygon): city_boundary = city.geometry.iloc[0]else: # It's a MultiPolygon city_boundary = unary_union(city.geometry)munich_boundary = gpd.GeoDataFrame(index=[0], crs=city.crs, geometry=[city_boundary])# Find the limiting bounding box for easier coordinate selectionminx, miny, maxx, maxy = munich_boundary.bounds.iloc[0]def process_batch(df_batch): geodata = [] for index, row in df_batch.iterrows(): # Unpack index based on its structure band, lat, lon = index # Assuming the structure is (band, lat, lon) if minx <= lon <= maxx and miny <= lat <= maxy: geodata.append({'geometry': Point(lon, lat), 'population': row['population']}) return geodata# Process the DataFrame in batchesbatch_size = 10000 # Adjust this based on your system's capabilitiesgeodata_total = []for start in tqdm(range(0, len(df), batch_size), desc="Processing"): df_batch = df.iloc[start:start+batch_size] geodata_total.extend(process_batch(df_batch))# Convert to GeoDataFrame and join with Munich boundarygdf_vienna = gpd.GeoDataFrame(geodata_total)gdf_vienna = gpd.sjoin(gdf_vienna, vienna_boundary, how='inner', op='within')print(gdf_vienna)# Set the CRS for gdf_munich if it's not already set# Replace 'EPSG:4326' with the correct CRS if your data uses a different oneif gdf_vienna.crs is None: gdf_vienna.set_crs('EPSG:4326', inplace=True)# Now reproject gdf_munich to match the CRS of munich_boundaryif gdf_vienna.crs != vienna_boundary.crs: gdf_vienna = gdf_vienna.to_crs(vienna_boundary.crs)# Check and drop 'index_left' and 'index_right' if they exist in gdf_viennaif 'index_left' in gdf_vienna.columns: gdf_vienna = gdf_vienna.drop(columns='index_left')if 'index_right' in gdf_vienna.columns: gdf_vienna = gdf_vienna.drop(columns='index_right')# Perform the spatial joingdf_vienna = gpd.sjoin(gdf_vienna, vienna_boundary, how='inner', predicate='within')# Drop the 'index_right' column created by spatial join, if it's not neededgdf_vienna = gdf_vienna.drop(columns='index_right', errors='ignore')# Filter out rows where 'population' is -99999.000000gdf_vienna = gdf_vienna[gdf_vienna["population"] != -99999.000000]gdf_viennaimport matplotlib.pyplot as pltf, ax = plt.subplots(1,1,figsize=(15,15))vienna_boundary.plot(ax=ax, color = 'k', edgecolor = 'orange', linewidth = 3)gdf_vienna.plot(column = 'population', cmap = 'inferno', ax=ax, alpha = 0.9, markersize = 0.25)ax.axis('off')f.patch.set_facecolor('black')# Save the GeoDataFrame to a GeoJSON fileoutput_file_path = "data/vienna_population.geojson"gdf_vienna.to_file(output_file_path, driver='GeoJSON') Urban pedestrian Accessability¶
urbanAccessDefi.ipynb
import osmnx as ox # version: 1.0.1import matplotlib.pyplot as plt # version: 3.7.1admin = {}cities = ['Vienna', 'Berlin']f, ax = plt.subplots(1,2, figsize = (15,5))# visualize the admin boundariesfor idx, city in enumerate(cities): admin[city] = ox.geocode_to_gdf(city) admin[city].plot(ax=ax[idx],color='none',edgecolor= 'k', linewidth = 2) ax[idx].set_title(city, fontsize = 16)from matplotlib.colors import LinearSegmentedColormapvery_peri = '#820A98' second_color = '#FEA214' colors = [second_color, very_peri ]n_bins = 50cmap_name = "VeryPeri"colormap = LinearSegmentedColormap.from_list(cmap_name, colors, N=n_bins)import geopandas as gpd # version: 0.9.0demographics = {}f, ax = plt.subplots(1,2, figsize = (15,5))for idx, city in enumerate(cities): city_data = gpd.read_file('data/' + city.lower() + '_population.geojson') # Filter out rows where the population is -99999.000000 city_data = city_data[city_data["population"] != -99999.000000] demographics[city] = city_data[['population', 'geometry']] admin[city].plot(ax=ax[idx], color='none', edgecolor ='k', linewidth=3) demographics[city].plot(column='population', cmap=colormap, ax=ax[idx], alpha=0.9, markersize=0.25) ax[idx].set_title('Population density\n in ' + city, fontsize=16) ax[idx].axis('off')plt.show()cities = {'Vienna': 'Austria', 'Berlin': 'Germany'}tags = {'public_transport': 'stop_position'}gdf_units = {}admin = {}for city, country in cities.items(): # Define the area to fetch data, specifying the correct country area = ox.geocode_to_gdf(f"{city}, {country}") admin[city] = area gdf_units[city] = ox.features_from_place(f"{city}, {country}", tags=tags)f, ax = plt.subplots(1, 2, figsize=(15, 5))very_peri = "#9c9ed9" # Color for markersfor idx, city in enumerate(cities): admin[city].plot(ax=ax[idx], color='none', edgecolor='k', linewidth=3) if not gdf_units[city].empty: gdf_units[city].plot(ax=ax[idx], alpha=0.9, color=very_peri, markersize=6.0) ax[idx].set_title(f'Locations of Public Transport Stops in {city}', fontsize=16) ax[idx].axis('off')plt.tight_layout()plt.show()import osimport pandanaimport pandas as pdimport numpy as npfrom shapely.geometry import Pointimport geopandas as gpdfrom pandana.loaders import osmfrom tqdm import tqdmdef get_city_accessibility(city, admin, POIs): # walkability parameters walkingspeed_kmh = 5 walkingspeed_mm = walkingspeed_kmh * 1000 / 60 distance = 100 # bounding box as a list of llcrnrlat, llcrnrlng, urcrnrlat, urcrnrlng minx, miny, maxx, maxy = admin.bounds.T[0].to_list() bbox = [miny, minx, maxy, maxx] # setting the input params, going for the nearest POI num_pois = 1 num_categories = 1 bbox_string = '_'.join([str(x) for x in bbox]) net_filename = 'data/network_{}.h5'.format(bbox_string) if not os.path.exists('data'): os.makedirs('data') # precomputing network distances if os.path.isfile(net_filename): network = pandana.network.Network.from_hdf5(net_filename) else: network = osm.pdna_network_from_bbox(bbox[0], bbox[1], bbox[2], bbox[3]) lcn = network.low_connectivity_nodes(impedance=1000, count=10, imp_name='distance') network.save_hdf5(net_filename, rm_nodes=lcn) network.precompute(distance + 1) # compute accessibilities on POIs pois = POIs.copy() pois['lon'] = pois.geometry.apply(lambda g: g.x) pois['lat'] = pois.geometry.apply(lambda g: g.y) pois = pois.drop(columns = ['geometry']) network.set_pois(category='all', x_col=pois['lon'], y_col=pois['lat'], maxdist=distance, maxitems=num_pois) all_access = network.nearest_pois(distance=distance, category='all', num_pois=num_pois) nodes = network.nodes_df nodes_acc = nodes.merge(all_access[[1]], left_index = True, right_index = True).rename(columns = {1 : 'distance'}) nodes_acc['time'] = nodes_acc.distance / walkingspeed_mm xs = list(nodes_acc.x) ys = list(nodes_acc.y) nodes_acc['geometry'] = [Point(xs[i], ys[i]) for i in range(len(xs))] nodes_acc = gpd.GeoDataFrame(nodes_acc) nodes_acc = gpd.overlay(nodes_acc, admin) nodes_acc[['time', 'geometry']].to_file(city + '_accessibility.geojson', driver = 'GeoJSON') return nodes_acc[['time', 'geometry']]Vienna¶
city = 'Vienna'admin_vienna = admin['Vienna']gdf_units_vienna = gdf_units['Vienna']accessibility_vienna = get_city_accessibility(city, admin_vienna, gdf_units_vienna)f, ax = plt.subplots(1, 1, figsize=(15, 8))# Plot the administrative boundary of Munichadmin_vienna.plot(ax=ax, color='k', edgecolor='k', linewidth=3)# Plot the accessibility data for Munichaccessibility_vienna.plot(column='time', cmap='RdYlGn_r', legend=True, ax=ax, markersize=1, alpha=0.8)gdf_units_vienna.plot(ax=ax, color='blue', markersize=5, alpha=0.7) # Adjust color and size as needed# Set the title for the plotax.set_title('Stop Accessibility in Minutes\nVienna', pad=40, fontsize=24)# Turn off axisax.axis('off')# Show the plotplt.show()import h3 # version: 3.7.3from shapely.geometry import Polygon # version: 1.7.1import numpy as npdef split_admin_boundary_to_hexagons(admin_gdf, resolution): # Extract the geometry geom = admin_gdf.geometry.iloc[0] # Prepare the coordinates list coords = [] # Check if the geometry is a MultiPolygon if geom.geom_type == 'MultiPolygon': for polygon in geom.geoms: coords.extend(list(polygon.exterior.coords)) else: coords = list(geom.exterior.coords) # Format the coordinates for H3 polyfill admin_geojson = {"type": "Polygon", "coordinates": [coords]} hexagons = h3.polyfill(admin_geojson, resolution, geo_json_conformant=True) hexagon_geometries = {hex_id: Polygon(h3.h3_to_geo_boundary(hex_id, geo_json=True)) for hex_id in hexagons} return gpd.GeoDataFrame(hexagon_geometries.items(), columns=['hex_id', 'geometry'])resolution = 8 hexagons_gdf = split_admin_boundary_to_hexagons(admin[city], resolution)hexagons_gdf.plot()for resolution in [7,8,9]: admin_h3 = {} for city in cities: admin_h3[city] = split_admin_boundary_to_hexagons(admin[city], resolution) f, ax = plt.subplots(1,2, figsize = (15,5)) for idx, city in enumerate(cities): admin[city].plot(ax=ax[idx], color = 'none', edgecolor = 'k', \ linewidth = 3) admin_h3[city].plot( ax=ax[idx], alpha = 0.8, edgecolor = 'k', \ color = 'none') ax[idx].set_title(city + ' (resolution = '+str(resolution)+')', \ fontsize = 14) ax[idx].axis('off')accessibilities = accessibility_vienna# Create hexagons for Munichresolution = 9 # You can adjust this as neededadmin_h3_vienna = split_admin_boundary_to_hexagons(admin['Vienna'], resolution)# Set the initial CRS for admin_h3_munichadmin_h3_vienna.crs = 'EPSG:4326' # Replace with the correct initial CRS# Now you can reproject admin_h3_munich to match the CRS of demographics['Munich']admin_h3_vienna = admin_h3_vienna.to_crs(demographics['Vienna'].crs)# Plot hexagons for Munichf, ax = plt.subplots(figsize=(15, 5))admin['Vienna'].plot(ax=ax, color='none', edgecolor='k', linewidth=3)admin_h3_vienna.plot(ax=ax, alpha=0.8, edgecolor='k', color='none')ax.set_title('Vienna (resolution = ' + str(resolution) + ')', fontsize=14)ax.axis('off')plt.show()# Ensure both GeoDataFrames are in the same CRSadmin_h3_vienna = admin_h3_vienna.to_crs(demographics['Vienna'].crs)# Set the initial CRS for admin_h3_munich# Replace 'EPSG:4326' with the correct CRS if differentadmin_h3_vienna.crs = 'EPSG:4326' # or admin['Munich'].crs if it's known to be correct# Now you can reproject admin_h3_munich to match the CRS of demographics['Munich']admin_h3_vienna = admin_h3_vienna.to_crs(demographics['Vienna'].crs)# Spatial join and aggregation for demographics in Munichdemographics_dict = gpd.sjoin(admin_h3_vienna, demographics['Vienna']).groupby('hex_id').sum('population').to_dict()['population']demographics_h3_vienna = admin_h3_vienna.copy()demographics_h3_vienna['population'] = demographics_h3_vienna.hex_id.map(demographics_dict)# Ensure both GeoDataFrames are in the same CRS for accessibility# Assuming accessibility_munich is already in the correct CRSaccessibility_h3_vienna = admin_h3_vienna.copy()# Spatial join and aggregation for accessibility in Munich# Use 'accessibility_munich' directly instead of indexing 'accessibilities'accessibility_dict = gpd.sjoin(admin_h3_vienna, accessibility_vienna).groupby('hex_id').mean('time').to_dict()['time']accessibility_h3_vienna['time'] = accessibility_h3_vienna.hex_id.map(accessibility_dict)# Plotting results for Munichf, ax = plt.subplots(2, 1, figsize=(15, 15))demographics_h3_vienna.plot(column='population', legend=True, cmap=colormap, ax=ax[0], alpha=0.9, markersize=0.25)accessibility_h3_vienna.plot(column='time', cmap='RdYlGn_r', legend=True, ax=ax[1])ax[0].set_title('Population level in Vienna', fontsize=16)ax[1].set_title('Stop reachability time in Munich', fontsize=16)for ax_i in ax: ax_i.axis('off')plt.show()f, ax = plt.subplots(figsize=(15, 5))# Calculate the total population for Munichtotal_pop_vienna = demographics_h3_vienna['population'].sum()# Merge the demographics and accessibility data for Munichmerged_vienna = demographics_h3_vienna.merge(accessibility_h3_vienna.drop(columns=['geometry']), on='hex_id')# Define time thresholds and calculate the population reached within each thresholdtime_thresholds = range(10)population_reached_vienna = [100 * merged_vienna[merged_vienna['time'] < limit]['population'].sum() / total_pop_vienna for limit in time_thresholds]# Plottingax.plot(time_thresholds, population_reached_vienna, linewidth=3, color=very_peri)ax.set_xlabel('Reachability time (min)', fontsize=14, labelpad=12)ax.set_ylabel('Fraction of population reached (%)', fontsize=14, labelpad=12)ax.set_xlim([0, 10])ax.set_ylim([0, 100])ax.set_title('Fraction of population vs Stop\naccessibility in Vienna', pad=20, fontsize=16)plt.show()Same for Berlin
GTFS Transportation Patterns¶
GTFSTransportaionPatterns.ipynb
import osroot = 'gtfs'cities = ['Berlin', 'Wien']updated = {city : [f for f in os.listdir(root + '/' + city)][0] for city in cities}updatedimport zipfilefor city in cities: zip_path = os.path.join(root, city, updated[city]) with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_contents = zip_ref.namelist() print(len(zip_contents), sorted(zip_contents), '\n')import geopandas as gpdimport pandas as pdimport osmnx as oximport matplotlib.pyplot as pltfrom shapely.geometry import Pointimport osimport zipfileimport ioroot = 'data/gtfs'cities = ['Wien', 'Berlin']updated = {city: [f for f in os.listdir(os.path.join(root, city))][0] for city in cities}stops = {}admin = {}# Change here: Create a 2x1 grid of subplotsf, ax = plt.subplots(2, 1, figsize=(15, 15))for idx, city in enumerate(cities): # Directly use ax[idx] to refer to the subplot bx = ax[idx] # Extract stops.txt from zip zip_path = os.path.join(root, city, updated[city]) with zipfile.ZipFile(zip_path, 'r') as zip_ref: stops_path = zip_ref.extract('stops.txt', '/tmp') # Extract to a temporary directory df_stops = pd.read_csv(stops_path) geometry = [Point(xy) for xy in zip(df_stops['stop_lon'], df_stops['stop_lat'])] gdf_stops = gpd.GeoDataFrame(df_stops, geometry=geometry) gdf_stops.crs = 'EPSG:4326' admin_ = ox.geocode_to_gdf(city) gdf_stops.plot(ax=bx, markersize=3, alpha=0.5) admin_.plot(ax=bx, color='none', edgecolor='k', linewidth=4) bx.set_title(city, fontsize=15) bx.axis('off') stops[city] = gpd.overlay(gdf_stops, admin_) admin[city] = admin_plt.tight_layout()plt.show()f, ax = plt.subplots(2, 1, figsize=(15, 15)) # 2x1 grid for two citiesfor idx, (city, admin_) in enumerate(admin.items()): bx = ax[idx] # Directly access the subplot gdf_stops = stops[city] gdf_stops.plot(ax=bx, markersize=3, alpha=0.5) admin_.plot(ax=bx, color='none', edgecolor='k', linewidth=4) bx.set_title(city, fontsize=15) bx.axis('off')plt.tight_layout()plt.show()from datetime import datetimedef parse_time_string(time_string): hour_value = int(time_string.split(':')[0]) if hour_value > 23: hour_value -= 24 time_string = str(hour_value) + ':' + time_string.split(':', 1)[1] parsed_time = datetime.strptime(time_string, "%H:%M:%S") return parsed_timeroot = 'gtfs'cities = ['Wien', 'Berlin']updated = {city: [f for f in os.listdir(os.path.join(root, city))][0] for city in cities}stop_times = {}for city in cities: print(city) # Extract stop_times.txt from zip zip_path = os.path.join(root, city, updated[city]) with zipfile.ZipFile(zip_path, 'r') as zip_ref: stop_times_path = zip_ref.extract('stop_times.txt', '/tmp') # Extract to a temporary directory df_stop_times = pd.read_csv(stop_times_path) df_stop_times['departure_time'] = df_stop_times['departure_time'].apply(parse_time_string) stop_times[city] = df_stop_timesfor city, df_stop_times in stop_times.items(): print(city, len(df_stop_times))# Display the first three rows of the DataFrame for the last city processeddf_stop_times.head(3)f, ax = plt.subplots(2,1,figsize=(15,20))for idx, (city, df_stop_times) in enumerate(stop_times.items()): bx = ax[idx] df_stop_times = df_stop_times df_stop_times['hour_minute'] = df_stop_times['departure_time'].dt.strftime('%H:%M') departure_counts = df_stop_times['hour_minute'].value_counts().sort_index() departure_counts.plot(kind='bar', color='steelblue', alpha = 0.8, width=0.8, ax = bx) bx.set_xlabel('Hour-Minute of Departure', fontsize = 18) bx.set_ylabel('Number of Departures', fontsize = 18) bx.set_title('Histogram of Departure Times by Hour-Minute in ' + city, fontsize = 26) bx.set_xticks([ijk for ijk, i in enumerate(departure_counts.index) if ':00' in i]) bx.set_xticklabels([i for i in departure_counts.index if ':00' in i]) plt.tight_layout()import geopandas as gpd # version: 0.13.1import h3 # version: 3.7.3from shapely.geometry import Polygon # version: 1.8.5.post1import numpy as np # version: 1.22.4def split_admin_boundary_to_hexagons(admin_gdf, resolution): # Extract the geometry geom = admin_gdf.geometry.iloc[0] # Prepare the coordinates list coords = [] # Check if the geometry is a MultiPolygon if geom.geom_type == 'MultiPolygon': for polygon in geom.geoms: coords.extend(list(polygon.exterior.coords)) else: coords = list(geom.exterior.coords) # Format the coordinates for H3 polyfill admin_geojson = {"type": "Polygon", "coordinates": [coords]} hexagons = h3.polyfill(admin_geojson, resolution, geo_json_conformant=True) hexagon_geometries = {hex_id: Polygon(h3.h3_to_geo_boundary(hex_id, geo_json=True)) for hex_id in hexagons} return gpd.GeoDataFrame(hexagon_geometries.items(), columns=['hex_id', 'geometry'])# let's test two resolutions for Berlinhexagons_gdf_h8 = split_admin_boundary_to_hexagons(admin['Berlin'], 8)hexagons_gdf_h9 = split_admin_boundary_to_hexagons(admin['Berlin'], 9)from matplotlib.colors import LogNormresolution = 8for city in cities: # create the hexagon grid hexagons_gdf = split_admin_boundary_to_hexagons(admin[city], resolution) # merge stops and stopppings gdf_stop_times = stops[city].merge(stop_times[city], left_on = 'stop_id',right_on = 'stop_id') # compute the number of unique stops and stoppings in each hexagon gdf_stops = gpd.sjoin(gdf_stop_times, hexagons_gdf) nunique = gdf_stops.groupby(by = 'hex_id').nunique().to_dict()['stop_id'] total = gdf_stops.groupby(by = 'hex_id').count().to_dict()['stop_id'] hexagons_gdf['nunique'] = hexagons_gdf.hex_id.map(nunique).fillna(0) hexagons_gdf['total'] = hexagons_gdf.hex_id.map(total).fillna(0) # visualize the number of stops and stoppings f, ax = plt.subplots(1,2,figsize=(15,5)) plt.suptitle(city + ', resolution = ' + str(resolution), fontsize=25) hexagons_gdf.plot(column = 'nunique', cmap = 'RdYlGn', legend = True, ax = ax[0]) hexagons_gdf.plot(column = 'total', cmap = 'RdYlGn', legend = True, ax = ax[1]) # for log-saled coloring: # hexagons_gdf.plot(column = 'total', cmap = 'RdYlGn', legend = True, ax = ax[1], norm=LogNorm(vmin=1, vmax = hexagons_gdf.total.max())) ax[0].set_title('Number of unique stops', fontsize = 17) ax[1].set_title('Number stoppings', fontsize = 17) for aax in ax: aax.axis('off')map_complete = { 0 : 'Tram, Streetcar, Light rail', 1 : 'Subway, Metro', 2 : 'Rail', 3 : 'Bus', 4 : 'Ferry', 11 : 'Trolleybus', 100 : 'Railway Service', 109 : 'Suburban Railway', 400 : 'Urban Railway Service', 401 : 'Metro Service', 700 : 'Bus Service', 717 : 'Regional Bus', 900 : 'Tram Service', 1000: 'Water Transport Service'}map_simple = { 0 : 'Tram', 1 : 'Subway', 2 : 'Railway', 3 : 'Bus', 4 : 'Ferry', 11 : 'Trolleybus', 100 : 'Railway', 109 : 'Railway', 400 : 'Railway', 401 : 'Subway', 700 : 'Bus', 717 : 'Bus', 900 : 'Tram', 1000 : 'Ferry', }from collections import Counter# this function does some nice formatting on the axis and labelsdef format_axis(ax): for pos in ['right', 'top']: ax.spines[pos].set_edgecolor('w') for pos in ['bottom', 'left']: ax.spines[pos].set_edgecolor('k') ax.tick_params(axis='x', length=6, width=2, colors='k') ax.tick_params(axis='y', length=6, width=2, colors='k') # Correct approach to set font size for tick labels for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontsize(12) root = 'gtfs'cities = ['Wien', 'Berlin']updated = {city: [f for f in os.listdir(os.path.join(root, city))][0] for city in cities}f, ax = plt.subplots(1, 2, figsize=(15, 4)) # Adjusted for 2 citiesroutes = {}for idx, city in enumerate(cities): # Extract routes.txt from zip zip_path = os.path.join(root, city, updated[city]) with zipfile.ZipFile(zip_path, 'r') as zip_ref: routes_path = zip_ref.extract('routes.txt', '/tmp') # Extract to a temporary directory df_route = pd.read_csv(routes_path) df_route['route_type'] = df_route['route_type'].astype(int) df_route['route_type_en'] = df_route['route_type'].map(map_simple) D = dict(Counter(df_route['route_type_en'])) routes[city] = df_route # Define color palette transport_colors = {'Railway': '#A65C47', 'Bus': '#0BB3D9', 'Tram': '#F2B705', 'Ferry': '#997CA6' , 'Trolleybus' : '#D91818', 'Subway' : '#0869A6'} # Create the bar chart labels = [str(label) for label in D.keys()] # Convert labels to strings values = list(D.values()) colors = [transport_colors.get(label, '#333333') for label in labels] # Default color if not found bars = ax[idx].bar(labels, values, color=colors) # Set the x-tick labels correctly ax[idx].set_xticks(range(len(labels))) ax[idx].set_xticklabels(labels, fontsize=10, rotation=60, ha='right') format_axis(ax[idx]) ax[idx].set_title(city, fontsize=18) ax[idx].set_ylabel('Number of routes', fontsize=15) ax[idx].set_yscale('log') plt.tight_layout()city = 'Berlin'df_routes = pd.read_csv('data/gtfs/shapes_berlin.txt')df_routesimport contextily as ctxcities = ['Berlin']for city in cities: df_shapes = pd.read_csv('data/gtfs/shapes_berlin.txt') # transform the shape a GeoDataFrame df_shapes['shape_pt_lat'] = df_shapes['shape_pt_lat'].astype(float) df_shapes['shape_pt_lon'] = df_shapes['shape_pt_lon'].astype(float) df_shapes['geometry'] = df_shapes.apply(lambda row: Point(row['shape_pt_lon'], row['shape_pt_lat']), axis=1) df_shapes = gpd.GeoDataFrame(df_shapes[['shape_id', 'geometry']]) df_shapes.crs = 4326 gdf_shapes = gpd.GeoDataFrame(df_shapes[['shape_id', 'geometry']].groupby(by = 'shape_id').agg(list)) gdf_shapes = gdf_shapes[[len(g)>1 for g in gdf_shapes['geometry'].to_list()]] gdf_shapes['geometry'] = gdf_shapes['geometry'].apply(lambda x: LineString(x)) gdf_shapes = gpd.GeoDataFrame(gdf_shapes) # let's only keep those routes which have at least one segment that falls into the city gdf_shapes['shape_id'] = gdf_shapes.index shapes_in_city = set(gpd.overlay(gdf_shapes, admin[city]).shape_id.to_list()) gdf_shapes = gdf_shapes[gdf_shapes.shape_id.isin(shapes_in_city)] # map the means of transport df_route = routes[city][['route_id', 'route_type_en']].drop_duplicates() df_trips = pd.read_csv('data/gtfs/trips_berlin.txt')[['route_id', 'shape_id']].drop_duplicates() df_trips = df_trips.merge(df_route, left_on = 'route_id', right_on = 'route_id') # merge these gdf_shapes = gdf_shapes.merge(df_trips, left_index = True, right_on = 'shape_id') # create the visuals in matplotlib f, ax = plt.subplots(1,1,figsize=(15,15)) cax = admin[city].plot(ax=ax, edgecolor = 'k', color = 'none') cax = admin[city].plot(ax=ax, edgecolor = 'k', alpha = 0.52) for transport, color in transport_colors.items(): gdf_shapes[gdf_shapes.route_type_en==transport].plot(ax=ax, color = color, alpha = 0.9, linewidth = 1) ax.axis('off') ctx.add_basemap(ax, alpha = 0.8, crs = 4326, url = ctx.providers.CartoDB.DarkMatterNoLabels) plt.tight_layout() plt.savefig('figure7_'+city+ '_new.png', dpi = 150, bbox_inches = 'tight') plt.close()city = 'Wien'df_routes = pd.read_csv('data/gtfs/shapes_wien.txt')df_routesimport contextily as ctxfor city in cities: df_shapes = pd.read_csv('data/gtfs/shapes_wien.txt') # transform the shape a GeoDataFrame df_shapes['shape_pt_lat'] = df_shapes['shape_pt_lat'].astype(float) df_shapes['shape_pt_lon'] = df_shapes['shape_pt_lon'].astype(float) df_shapes['geometry'] = df_shapes.apply(lambda row: Point(row['shape_pt_lon'], row['shape_pt_lat']), axis=1) df_shapes = gpd.GeoDataFrame(df_shapes[['shape_id', 'geometry']]) df_shapes.crs = 4326 gdf_shapes = gpd.GeoDataFrame(df_shapes[['shape_id', 'geometry']].groupby(by = 'shape_id').agg(list)) gdf_shapes = gdf_shapes[[len(g)>1 for g in gdf_shapes['geometry'].to_list()]] gdf_shapes['geometry'] = gdf_shapes['geometry'].apply(lambda x: LineString(x)) gdf_shapes = gpd.GeoDataFrame(gdf_shapes) # let's only keep those routes which have at least one segment that falls into the city gdf_shapes['shape_id'] = gdf_shapes.index shapes_in_city = set(gpd.overlay(gdf_shapes, admin[city]).shape_id.to_list()) gdf_shapes = gdf_shapes[gdf_shapes.shape_id.isin(shapes_in_city)] # map the means of transport df_route = routes[city][['route_id', 'route_type_en']].drop_duplicates() df_trips = pd.read_csv('data/gtfs/trips_wien.txt')[['route_id', 'shape_id']].drop_duplicates() df_trips = df_trips.merge(df_route, left_on = 'route_id', right_on = 'route_id') # merge these gdf_shapes = gdf_shapes.merge(df_trips, left_index = True, right_on = 'shape_id') # create the visuals in matplotlib f, ax = plt.subplots(1,1,figsize=(15,15)) cax = admin[city].plot(ax=ax, edgecolor = 'k', color = 'none') cax = admin[city].plot(ax=ax, edgecolor = 'k', alpha = 0.52) for transport, color in transport_colors.items(): gdf_shapes[gdf_shapes.route_type_en==transport].plot(ax=ax, color = color, alpha = 0.9, linewidth = 1) ax.axis('off') ctx.add_basemap(ax, alpha = 0.8, crs = 4326, url = ctx.providers.CartoDB.DarkMatterNoLabels) plt.tight_layout() plt.savefig('figure7_'+city+ '.png', dpi = 150, bbox_inches = 'tight') plt.close()Trips per day¶
trips per day on sydney
import pandas as pdimport geopandas as gpdimport matplotlib.pyplot as pltimport seaborn as snsimport numpy as npimport contextily as ctximport pyprojfrom shapely.geometry import Point, LineStringfrom zipfile import ZipFile, Pathimport datetimefrom matplotlib.colors import TwoSlopeNormshow_date_str = "2019-12-15"with ZipFile("data/gtfs/Wien/gtfs.zip") as myzip: shapes_df = pd.read_csv(myzip.open("shapes.txt"), dtype={ 'shape_id': 'str', 'shape_pt_lat': 'float', 'shape_pt_lon': 'float', 'shape_pt_sequence': 'Int64', 'shape_dist_traveled': 'float', }) shapes_gdf = gpd.GeoDataFrame(shapes_df, geometry=gpd.points_from_xy(shapes_df.shape_pt_lon, shapes_df.shape_pt_lat)).set_crs(epsg=4326) stops_df = pd.read_csv(myzip.open("stops.txt"), dtype={ 'stop_id': 'str', 'stop_code': 'str', 'stop_name': 'str', 'stop_lat': 'float', 'stop_lon': 'float', 'location_type': 'Int64', 'parent_station': 'str', 'wheelchair_boarding': 'str', 'platform_code': 'str',}) stops_gdf = gpd.GeoDataFrame(stops_df, geometry=gpd.points_from_xy(stops_df.stop_lon, stops_df.stop_lat)).set_crs(epsg=4326) routes_df = pd.read_csv(myzip.open("routes.txt"), dtype={ 'route_id': 'str', 'agency_id': 'str', 'route_short_name': 'str', 'route_long_name': 'str', 'route_desc': 'str', 'route_type': 'Int64', 'route_color': 'str', 'route_text_color': 'str', 'exact_times': 'bool' }) trips_df = pd.read_csv(myzip.open("trips.txt"), dtype={ 'route_id': 'str', 'service_id': 'str', 'trip_id': 'str', 'shape_id': 'str', 'trip_headsign': 'str', 'direction_id': 'str', 'block_id': 'str', 'wheelchair_accessible': 'str', 'route_direction': 'str', 'trip_note': 'str', 'bikes_allowed': 'str' }) stop_times_df = pd.read_csv(myzip.open("stop_times.txt"), dtype={ 'trip_id': 'str', 'arrival_time': 'str', 'stop_id': 'str', 'departure_time': 'str', 'stop_id': 'str', 'stop_sequence': 'Int64', 'stop_headsign': 'str', 'pickup_type': 'Int64', 'drop_off_type': 'Int64', 'shape_dist_traveled': 'float', 'timepoint': 'bool', 'stop_note': 'str', }).astype({}) agency_df = pd.read_csv(myzip.open("agency.txt"), dtype={ 'agency_id': 'str', 'agency_name': 'str', 'agency_url': 'str', 'agency_timezone': 'str', 'agency_lang': 'str', 'agency_phone': 'str', }) calendar_df = pd.read_csv(myzip.open("calendar.txt"), dtype={ 'service_id': 'str', 'monday': 'bool', 'tuesday': 'bool', 'wednesday': 'bool', 'thursday': 'bool', 'friday': 'bool', 'saturday': 'bool', 'sunday': 'bool', 'start_date': 'str', 'end_date': 'str', }) calendar_dates_df = pd.read_csv(myzip.open("calendar_dates.txt"), dtype={ 'service_id': 'str', 'date': 'str', 'exception_type': 'Int64', })services running on that day
date = datetime.datetime.strptime(show_date_str, "%Y-%m-%d")date_string = date.strftime("%Y%m%d")day_of_week_name = date.strftime('%A').lower()services_for_day_1 = calendar_df[(calendar_df[day_of_week_name]) & (date_string >= calendar_df.start_date) & (date_string <= calendar_df.end_date)].service_id.to_numpy()print(f"scheduled for day based on calendar: {len(services_for_day_1)}")# exception_type# 1 - Service has been added for the specified date.# 2 - Service has been removed for the specified date.services_added_for_day = calendar_dates_df[(calendar_dates_df.date == date_string) & (calendar_dates_df.exception_type == 1)].service_id.to_numpy()services_removed_for_day = calendar_dates_df[(calendar_dates_df.date == date_string) & (calendar_dates_df.exception_type == 2)].service_id.to_numpy()print(f"services added using calendar_dates: {len(services_added_for_day)}")print(f"services removed using calendar_dates: {len(services_removed_for_day)}")services_for_day_2 = np.concatenate([services_for_day_1, services_added_for_day])services_for_day = np.setdiff1d(services_for_day_2, services_removed_for_day)print(f"final services for day: {len(services_for_day)}")Get each route segment with start and end coordinates and sequence
coords = shapes_df[["shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"]]coords_roll_1 = np.roll(coords, 1, axis=0)segments = pd.DataFrame(np.concatenate([coords_roll_1, coords], axis=1), columns=["start_lat", "start_lng", "start_seq", "end_lat", "end_lng", "end_seq"])segments_df = shapes_df[["shape_id"]].join(segments)segments_df = segments_df[segments_df.end_seq != 1]segments_df = segments_df.drop(columns=['end_seq']).rename(columns={ "start_seq": "seq" })segments_dfMerge segments with day trip count
day_trips = trips_df[trips_df.service_id.isin(services_for_day)]sydney_bus_route_ids = routes_df.route_id.unique()shape_day_trips = day_trips[day_trips.route_id.isin(sydney_bus_route_ids)].groupby(by="shape_id").size().to_frame("day_trips")shape_day_tripsGet number of trips for each segment
shape_segments_day_trips = pd.merge(segments_df, shape_day_trips, left_on="shape_id", right_index=True)# how many trip occurs in each segment per dayshape_segments_day_trips = pd.merge(segments_df, shape_day_trips, left_on="shape_id", right_index=True)segment_day_trips = shape_segments_day_trips.groupby(by=[ "start_lat", "start_lng", "end_lat", "end_lng"]).sum("day_trips").reset_index()Create LineString geometry for segment and create GeoDataFrame
from shapely.geometry import Point, LineStringdef get_line_string(row): start = Point(row.start_lng, row.start_lat) end = Point(row.end_lng, row.end_lat) line = LineString([start, end]) return linesegment_day_trips['geometry'] = segment_day_trips.apply(get_line_string, axis=1)# Convert to a GeoDataFrame, setting the geometry columnsegment_day_trips_gpd = gpd.GeoDataFrame(segment_day_trips, geometry='geometry').set_crs(epsg=4326)segment_day_trips_gpdPlot each segment with color showing number of trips per day
vmin = segment_day_trips_gpd["day_trips"].min()vmax = segment_day_trips_gpd["day_trips"].quantile(.95)vcenter = segment_day_trips_gpd["day_trips"].mean()norm = TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)fig = plt.figure(figsize=(30,21))ax = plt.axes()ax.set(facecolor = "black")def limit_to_bounding_box(gdf, bounding_box): return gdf.cx[bounding_box["west"]:bounding_box["east"],bounding_box["south"]:bounding_box["north"]]# Define Wien bounding boxvienna_bb = { "north": 48.305, "south": 48.123, "west": 16.182, "east": 16.577,}# Filter the GeoDataFrame for Berlin areaberlin_segment_day_trips_gpd = limit_to_bounding_box(segment_day_trips_gpd, vienna_bb)# Plotberlin_segment_day_trips_gpd.plot(ax=ax, cmap="plasma", column='day_trips', legend=True, norm=norm)ctx.add_basemap(ax, alpha = 0.8, crs = 4326, url = ctx.providers.CartoDB.DarkMatterNoLabels)# Set title (adjust the date format as needed)ax.set_title(f"Number of trips per day in Wien Buses Network on {date:%d %B %Y}", fontsize=30)# Save the figureplt.savefig("Wien Buses Network_basemap.jpeg")Berlin¶
with ZipFile("data/gtfs/Berlin2019/gtfs.zip") as myzip: shapes_df = pd.read_csv(myzip.open("shapes.txt"), dtype={ 'shape_id': 'str', 'shape_pt_lat': 'float', 'shape_pt_lon': 'float', 'shape_pt_sequence': 'Int64', 'shape_dist_traveled': 'float', }) shapes_gdf = gpd.GeoDataFrame(shapes_df, geometry=gpd.points_from_xy(shapes_df.shape_pt_lon, shapes_df.shape_pt_lat)).set_crs(epsg=4326) stops_df = pd.read_csv(myzip.open("stops.txt"), dtype={ 'stop_id': 'str', 'stop_code': 'str', 'stop_name': 'str', 'stop_lat': 'float', 'stop_lon': 'float', 'location_type': 'Int64', 'parent_station': 'str', 'wheelchair_boarding': 'str', 'platform_code': 'str',}) stops_gdf = gpd.GeoDataFrame(stops_df, geometry=gpd.points_from_xy(stops_df.stop_lon, stops_df.stop_lat)).set_crs(epsg=4326) routes_df = pd.read_csv(myzip.open("routes.txt"), dtype={ 'route_id': 'str', 'agency_id': 'str', 'route_short_name': 'str', 'route_long_name': 'str', 'route_desc': 'str', 'route_type': 'Int64', 'route_color': 'str', 'route_text_color': 'str', 'exact_times': 'bool' }) trips_df = pd.read_csv(myzip.open("trips.txt"), dtype={ 'route_id': 'str', 'service_id': 'str', 'trip_id': 'str', 'shape_id': 'str', 'trip_headsign': 'str', 'direction_id': 'str', 'block_id': 'str', 'wheelchair_accessible': 'str', 'route_direction': 'str', 'trip_note': 'str', 'bikes_allowed': 'str' }) stop_times_df = pd.read_csv(myzip.open("stop_times.txt"), dtype={ 'trip_id': 'str', 'arrival_time': 'str', 'stop_id': 'str', 'departure_time': 'str', 'stop_id': 'str', 'stop_sequence': 'Int64', 'stop_headsign': 'str', 'pickup_type': 'Int64', 'drop_off_type': 'Int64', 'shape_dist_traveled': 'float', 'timepoint': 'bool', 'stop_note': 'str', }).astype({}) agency_df = pd.read_csv(myzip.open("agency.txt"), dtype={ 'agency_id': 'str', 'agency_name': 'str', 'agency_url': 'str', 'agency_timezone': 'str', 'agency_lang': 'str', 'agency_phone': 'str', }) calendar_df = pd.read_csv(myzip.open("calendar.txt"), dtype={ 'service_id': 'str', 'monday': 'bool', 'tuesday': 'bool', 'wednesday': 'bool', 'thursday': 'bool', 'friday': 'bool', 'saturday': 'bool', 'sunday': 'bool', 'start_date': 'str', 'end_date': 'str', }) calendar_dates_df = pd.read_csv(myzip.open("calendar_dates.txt"), dtype={ 'service_id': 'str', 'date': 'str', 'exception_type': 'Int64', })show_date_str = "2019-12-15"date = datetime.datetime.strptime(show_date_str, "%Y-%m-%d")date_string = date.strftime("%Y%m%d")day_of_week_name = date.strftime('%A').lower()services_for_day_1 = calendar_df[(calendar_df[day_of_week_name]) & (date_string >= calendar_df.start_date) & (date_string <= calendar_df.end_date)].service_id.to_numpy()print(f"scheduled for day based on calendar: {len(services_for_day_1)}")# exception_type# 1 - Service has been added for the specified date.# 2 - Service has been removed for the specified date.services_added_for_day = calendar_dates_df[(calendar_dates_df.date == date_string) & (calendar_dates_df.exception_type == 1)].service_id.to_numpy()services_removed_for_day = calendar_dates_df[(calendar_dates_df.date == date_string) & (calendar_dates_df.exception_type == 2)].service_id.to_numpy()print(f"services added using calendar_dates: {len(services_added_for_day)}")print(f"services removed using calendar_dates: {len(services_removed_for_day)}")services_for_day_2 = np.concatenate([services_for_day_1, services_added_for_day])services_for_day = np.setdiff1d(services_for_day_2, services_removed_for_day)print(f"final services for day: {len(services_for_day)}")coords = shapes_df[["shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"]]coords_roll_1 = np.roll(coords, 1, axis=0)segments = pd.DataFrame(np.concatenate([coords_roll_1, coords], axis=1), columns=["start_lat", "start_lng", "start_seq", "end_lat", "end_lng", "end_seq"])segments_df = shapes_df[["shape_id"]].join(segments)segments_df = segments_df[segments_df.end_seq != 1]segments_df = segments_df.drop(columns=['end_seq']).rename(columns={ "start_seq": "seq" })segments_dfday_trips = trips_df[trips_df.service_id.isin(services_for_day)]sydney_bus_route_ids = routes_df.route_id.unique()shape_day_trips = day_trips[day_trips.route_id.isin(sydney_bus_route_ids)].groupby(by="shape_id").size().to_frame("day_trips")shape_day_tripsshape_segments_day_trips = pd.merge(segments_df, shape_day_trips, left_on="shape_id", right_index=True)# how many trip occurs in each segment per dayshape_segments_day_trips = pd.merge(segments_df, shape_day_trips, left_on="shape_id", right_index=True)segment_day_trips = shape_segments_day_trips.groupby(by=[ "start_lat", "start_lng", "end_lat", "end_lng"]).sum("day_trips").reset_index()from shapely.geometry import Point, LineStringdef get_line_string(row): start = Point(row.start_lng, row.start_lat) end = Point(row.end_lng, row.end_lat) line = LineString([start, end]) return linesegment_day_trips['geometry'] = segment_day_trips.apply(get_line_string, axis=1)# Convert to a GeoDataFrame, setting the geometry columnsegment_day_trips_gpd = gpd.GeoDataFrame(segment_day_trips, geometry='geometry').set_crs(epsg=4326)segment_day_trips_gpdimport osmnx as oxcity_name = "Berlin"berlin_admin_boundary = ox.geocode_to_gdf(city_name)berlin_admin_boundary = berlin_admin_boundary.to_crs("EPSG:4326")berlin_segment_day_trips_gpd = gpd.clip(berlin_segment_day_trips_gpd, berlin_admin_boundary)berlin_segment_day_trips_gpd.plot()from mpl_toolkits.axes_grid1 import make_axes_locatablefrom mpl_toolkits.axes_grid1.inset_locator import inset_axesfrom matplotlib.colors import TwoSlopeNormimport contextily as ctxvmin = segment_day_trips_gpd["day_trips"].min()vmax = segment_day_trips_gpd["day_trips"].quantile(.95)vcenter = segment_day_trips_gpd["day_trips"].mean()norm = TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)berlin_segment_day_trips_gpd = berlin_segment_day_trips_gpd[~berlin_segment_day_trips_gpd.geometry.is_empty]berlin_segment_day_trips_gpd = berlin_segment_day_trips_gpd[~berlin_segment_day_trips_gpd.geometry.apply(lambda geom: geom.is_ring)]fig = plt.figure(figsize=(30,21))ax = plt.axes()ax.set(facecolor = "black")# Plotberlin_segment_day_trips_gpd.plot(ax=ax, cmap="plasma", column='day_trips', legend=True, norm=norm)#ctx.add_basemap(ax, alpha = 0.8, crs = 4326, url = ctx.providers.CartoDB.DarkMatterNoLabels)# Set title (adjust the date format as needed)ax.set_title(f"Number of trips per day in Berlin Buses Network on {date:%d %B %Y}", fontsize=30)# Save the figureplt.savefig("Berlin Buses Network_withoutbasemap.jpeg") Osmnx prep data¶
osmnx_01.ipynb
import osmnx as oxG = ox.graph_from_place('Wien', network_type='drive')ox.plot_graph(G, bgcolor="w", node_color="r", edge_color="#aaa", figsize=(12,12))print("node count:", len(G.nodes()))print("edge count:", len(G.edges()))ox.save_graphml(G, 'data/Wien_drive.graphml')G = ox.graph_from_place('Berlin', network_type='drive')ox.plot_graph(G, bgcolor="w", node_color="r", edge_color="#aaa", figsize=(12,12))print("node count:", len(G.nodes()))print("edge count:", len(G.edges()))ox.save_graphml(G, 'data/Berlin_drive.graphml')Osmnx drive time¶
osmnx_02.ipynb
import osmnx as oxfrom io import BytesIOfrom urllib.request import urlopenfrom zipfile import ZipFileimport tempfileimport geopandas as gpdimport networkx as nximport matplotlib.colors as mcolorsimport matplotlib.pyplot as pltimport matplotlib.cm as cmimport contextily as ctxorigin_address = "Goldschmiedgasse 2, Wien, Austria"zipurl = "C:/Users/leoni/Documents/Uni/Geopython/Final/data/wien_drive.graphml.zip"# No need to download the file, just extract it from the provided pathwith ZipFile(zipurl, 'r') as zfile: with tempfile.TemporaryDirectory() as tempdir: zfile.extractall(tempdir) G = ox.load_graphml(f"{tempdir}/wien_drive.graphml")# add travel time based on maximum speedG = ox.add_edge_speeds(G) G = ox.add_edge_travel_times(G) # change map projection to Pseudo-Mercator# this projection is used by most base map tile providers G = ox.projection.project_graph(G, to_crs=3857)# get edges as Geo data frame_, gdf_edges = ox.graph_to_gdfs(G)fig = plt.figure(figsize=(20,16))ax = plt.axes()ax.set(facecolor = "black")ax.set_title("Roads in Vienna with maximum speed more than 50 km/h", fontsize=20)gdf_edges[gdf_edges.speed_kph > 25].plot(ax=ax, cmap="viridis", column="speed_kph", legend=True)# converts string address into geographocal coordinatesdef geocode_address(address, crs=4326): geocode = gpd.tools.geocode(address, provider='nominatim', user_agent="drive time demo").to_crs(crs) return (geocode.iloc[0].geometry.y, geocode.iloc[0].geometry.x)# find origin network nodeorigin_coordinates = geocode_address(origin_address, crs=3857)origin_node_id = ox.distance.nearest_nodes(G, origin_coordinates[1], origin_coordinates[0])# find travel times and route to each node in network(travel_times, routes) = nx.single_source_dijkstra(G, origin_node_id, weight="travel_time")# set travel_time in minutes as attribute to each network nodefor node_id in travel_times: G.nodes.get(node_id)["travel_time"] = travel_times[node_id]/60 # get nodes and edges as Geo data framegdf_nodes, gdf_edges = ox.graph_to_gdfs(G)# find longest travel time max_time_sec = max(travel_times.values())/60# Define color scale using minimum and maximum travel timesnorm = mcolors.TwoSlopeNorm(vmin=0, vcenter=max_time_sec/3, vmax=max_time_sec)# Create figure and axesfig = plt.figure(figsize=(20,20))ax = plt.axes()ax.set_axis_off()# Add colorbar showing travel time and corresponding colorcb = fig.colorbar(cm.ScalarMappable(norm=norm, cmap="plasma_r"), ax=ax, orientation='horizontal')cb.set_label('Driving time in minutes', fontsize=20)# Plot each node with color indicating travel timegdf_nodes.plot(ax=ax, column="travel_time", cmap="plasma_r", norm=norm, s=10, alpha=.6)# add basemap with location labelsctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.DE, alpha=.3)ctx.add_basemap(ax, source=ctx.providers.CartoDB.VoyagerOnlyLabels)# Set plot titleax.set_title("Driving time in minutes from city centre, Vienna", fontsize=20)# Optionally, save the figureplt.savefig("Driving_Time_Wien.png")Berlin
origin_address = "Jüdenstraße 1, Berlin, Germany"zipurl = "C:/Users/leoni/Documents/Uni/Geopython/Final/data/berlin_drive.graphml.zip"# No need to download the file, just extract it from the provided pathwith ZipFile(zipurl, 'r') as zfile: with tempfile.TemporaryDirectory() as tempdir: zfile.extractall(tempdir) G = ox.load_graphml(f"{tempdir}/berlin_drive.graphml")# add travel time based on maximum speedG = ox.add_edge_speeds(G) G = ox.add_edge_travel_times(G) # change map projection to Pseudo-Mercator# this projection is used by most base map tile providers G = ox.projection.project_graph(G, to_crs=3857)# get edges as Geo data frame_, gdf_edges = ox.graph_to_gdfs(G)fig = plt.figure(figsize=(20,16))ax = plt.axes()ax.set(facecolor = "black")ax.set_title("Roads in Berlin with maximum speed more than 50 km/h", fontsize=20)gdf_edges[gdf_edges.speed_kph > 25].plot(ax=ax, cmap="viridis", column="speed_kph", legend=True)# converts string address into geographocal coordinatesdef geocode_address(address, crs=4326): geocode = gpd.tools.geocode(address, provider='nominatim', user_agent="drive time demo").to_crs(crs) return (geocode.iloc[0].geometry.y, geocode.iloc[0].geometry.x)# find origin network nodeorigin_coordinates = geocode_address(origin_address, crs=3857)origin_node_id = ox.distance.nearest_nodes(G, origin_coordinates[1], origin_coordinates[0])# find travel times and route to each node in network(travel_times, routes) = nx.single_source_dijkstra(G, origin_node_id, weight="travel_time")# set travel_time in minutes as attribute to each network nodefor node_id in travel_times: G.nodes.get(node_id)["travel_time"] = travel_times[node_id]/60 # get nodes and edges as Geo data framegdf_nodes, gdf_edges = ox.graph_to_gdfs(G)# find longest travel time max_time_sec = max(travel_times.values())/60# Define color scale using minimum and maximum travel timesnorm = mcolors.TwoSlopeNorm(vmin=0, vcenter=max_time_sec/3, vmax=max_time_sec)# Create figure and axesfig = plt.figure(figsize=(20,20))ax = plt.axes()ax.set_axis_off()# Add colorbar showing travel time and corresponding colorcb = fig.colorbar(cm.ScalarMappable(norm=norm, cmap="plasma_r"), ax=ax, orientation='horizontal')cb.set_label('Driving time in minutes', fontsize=20)# Plot each node with color indicating travel timegdf_nodes.plot(ax=ax, column="travel_time", cmap="plasma_r", norm=norm, s=10, alpha=.6)# add basemap with location labelsctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.DE, alpha=.3)ctx.add_basemap(ax, source=ctx.providers.CartoDB.VoyagerOnlyLabels)# Set plot titleax.set_title("Driving time in minutes from city centre, Berlin", fontsize=20)# Optionally, save the figureplt.savefig("Driving_Time_Berlin.png") 4. Discussion / reflection¶
Add possible notes / discussion e.g. about methodological challenges or data-related limitations that you think are relevant for your analysis and/or topic.
zipurl = "C:/Users/leoni/Documents/Uni/Geopython/Final/data/wien_drive.graphml.zip"
No need to download the file, just extract it from the provided path¶
with ZipFile(zipurl, 'r') as zfile:with tempfile.TemporaryDirectory() as tempdir:zfile.extractall(tempdir)G = ox.load_graphml(f"{tempdir}/wien_drive.graphml")## 5. References
Add references here.